use "$temp/acs_census_merged_sample", clear

***2000 cz and shock
gen fips = state_2000 + county_2000
destring fips, replace
merge m:1 fips using "$data/Other/cz_xwalk/cz_xwalk_small", keep(1 3) nogen //2000 CZ
merge m:1 cz using "$temp/cz_shocks", keep(1 3) nogen //get CZ shocks
ren fips fips_2000
ren cz cz_2000

***current CZ
gen fips = st + cty
destring fips, replace
merge m:1 fips using "$data/Other/cz_xwalk/cz_xwalk_small", keep(1 3) nogen
count if year == 2007 & dby>=1990 & merge_long == 3
count if cz == cz_2000 & year == 2007 & dby>=1990 & merge_long == 3 //analysis of migration rate

****do stuff with modifying weights
gen included = (merge_long == 3)
gen race = 1 //white
replace race = 2 if imprc == " 2" //black
replace race =  3 if imprc == " 4" //asian
destring his, replace
gen hispan = (his>101) //hispanic dummy
replace race = 4 if hispan
drop if st == "72" //kill PR

//probabiliyt of beiung included in main sample
preserve
collapse (mean) included [fw = pwgt], by(st sex race)
keep included st sex race
ren included adjust
save "$temp/sample_weight_adjust", replace
restore

//merge and adjust weights
merge m:1 st sex race using "$temp/sample_weight_adjust", keep(match) nogen
su included [fw = pwgt] //overall chance of being included in sample
gen pwgt2 = pwgt * `r(mean)'
replace pwgt2 = pwgt2 / adjust // 
gen int weight = round(pwgt2) //integer

***deflate earnings and final cleanup
merge m:1 year using "$data/Other/pce", keep(match) nogen
replace ti = . if ti == 999999
replace wag = . if wag == 999999
replace ti = ti / pce * 100 //deflate
replace wag = wag / pce * 100 //deflate
replace hrlywage = hrlywage / pce * 100

*****prepare dependent varialbes*****
***income
replace ti = 0 if ti<0

***employment
gen emp = 0
replace emp = 1 if esr == "1" | esr == "5" | esr == "4" | esr == "2"
replace emp = emp*100

***school attendance
gen enroll = 0
replace enroll = 1 if sch == "2" | sch == "3" 
replace enrol = enroll * 100

****LFP intensity
//wkh needs no fixing
gen gt_part_time = (wkw>26 & wkh>20)
replace gt_part_time = (wkw<5 & wkh>20) if year>=2008 //2008-or-later version
replace gt_part_time = gt_part_time*100

***years of schooling and degree attainment
//2007 and earlier
destring schl, replace
gen yr_schl = schl + 4
replace yr_schl = 12 if schl == 9
replace yr_schl = 12 if schl == 10
replace yr_schl = 13 if schl == 11
replace yr_schl = 14 if schl == 12
replace yr_schl = 16 if schl == 13
replace yr_schl = 18 if schl == 14
replace yr_schl = 19 if schl == 15
replace yr_schl = 23 if schl == 16

//attainment dummies
gen lths = (schl<9)
gen hs = (schl >= 9)
gen somecoll = (schl>=10)
gen assoc = (schl >= 12)
gen adv = (schl >13)
gen coll = (schl>=13) //at least college dummy

//2008 and later
replace yr_schl = schl - 3 if year>=2008
replace yr_schl = 12 if schl == 16 & year>=2008
replace yr_schl = 12 if schl == 17 & year>=2008
replace yr_schl = 12 if schl == 18 & year>=2008 
replace yr_schl = 13 if schl == 19 & year>=2008
replace yr_schl = 14 if schl == 20 & year>=2008
replace yr_schl = 16 if schl == 21 & year>=2008
replace yr_schl = 18 if schl == 22 & year>=2008
replace yr_schl = 19 if schl == 23 & year>=2008
replace yr_schl = 23 if schl == 24 & year>=2008

//attainment dummies
replace lths = (schl<16) if year>=2008
replace hs = (schl >= 16) if year>=2008
replace somecoll = (schl>=18) if year>=2008
replace assoc = (schl >= 20) if year>=2008
replace adv = (schl >21) if year>=2008
replace coll = (schl>=21) if year>=2008 //at least college dummy

//years of potential experience
gen exp = (age - yr_schl - 6)
replace exp = 0 if exp<0

***migration
gen mig_cz = (cz!=cz_2000)*100

****other demographics
gen black = imprc == " 2"
gen asian = imprc == " 4"
destring sex, replace
replace sex = sex - 1
destring state_2000, replace
destring pov, replace

//age groups
cap drop age_group
gen age_group = 0
replace age_group = 1 if age>=17 & age<=19
replace age_group = 2 if age>=20 & age<=22
replace age_group = 3 if age>=23 & age<=25
replace age_group = 4 if age>=26 & age<=28
gen age_dum_1 = (age_group==1)*cz_shock
gen age_dum_2 = (age_group==2)*cz_shock
gen age_dum_3 = (age_group==3)*cz_shock
gen age_dum_4 = (age_group==4)*cz_shock

//education dummies
ds lths hs somecoll assoc adv coll 
foreach var in `r(varlist)'{
	replace `var' = `var' * 100
}

//background stuff
destring qhigh_head, replace
gen head_coll = (qhigh_head>=13)
gen head_mar = (qms_head == "1")

////dummy for housing
gen own_house = 0
replace own_house = 1 if (stenure_2000 == "1" | stenure_2000 == "2") & (house_value>0) //dummmy for household having home equity in 2000

//dummy for head working in construction or manufacturing
gen head_constr_manu = 0
destring qind_head, replace
replace head_constr_manu = 1 if qind_head>=77 & qind_head<=406 //3=construction . 4-23=manufacturing of various sorts

**deal with outliers realted to income, wages, and hours
//total income
su ti, d
replace ti = `r(p99)'*1.5 if ti>`r(p99)' & ti!=.

//wage income
su wag, d
replace wag = `r(p99)'*1.5 if wag>`r(p99)' & wag!=.

//wages
su hrlywage, d
replace hrlywage = `r(p99)'*1.5 if hrlywage>`r(p99)' & hrlywage!=.

//log versions
gen logti = log(ti)
gen logwag = log(wag)
gen loghrwg = log(hrlywage) 
gen logwkh = log(wkh)

//save
compress
save "$temp/acs_longform_merged_clean_all", replace //for summary stats later

keep if merge_long == 3 //restrict to people we match in the long form
compress
save "$temp/acs_longform_merged_clean", replace



//end of dofile
